#Figure A2

#directory
#set working directory to the path PvP_Replication
#setwd("~/PvP_Replication")

#packages
library(tidyverse) #for data cleaning and manipulation
library(kbal) #for calculating kernel balancing weights
library(estimatr) #for estimation
library(MatchIt) # for matching
library(WeightIt) # for entropy weights

#load main dataset
pvp_data <- read.csv("PvP_data/PvP_data_main.csv")


#transform independent variable
#create binary (hectares > 100) cultivation variable
pvp_data <- pvp_data %>% 
  mutate(cultbinary = ifelse(maxcult > 100, 1, 0))

#balancing model

#transform selected covariates
#create variable for highways per square km, percent of population in rural zones, log population
pvp_data <- pvp_data %>% 
  mutate(hwycover = (hwylengthkm/sqkm), ruralpct = (ruralpop14/pop14)*100, logpop = log(pop14))

#list of covariates for kernel balancing
covariate_list <- c("lit_ratio", "electric_ratio", "logpop", "ruralpct", "elev_stdev", "hwycover", "distCali", "distBogota", "distMedellin")

#subset data to FARC municipalities for main anaylses
pvp_farc_subset <- pvp_data %>% filter(farc_presence == 1)

#note that there are two cases missing covariates
pvp_farc_subset %>% 
  dplyr::select(municode, all_of(covariate_list), dissident_presence, cultbinary) %>% 
  filter(!complete.cases(.)) %>% 
  count()

#drop the two incomplete cases
pvp_farc_subset <- pvp_farc_subset %>% 
  filter(complete.cases(pvp_farc_subset[ , c(covariate_list, 'dissident_presence', 'cultbinary')]))


#generate weights using kernel balancing
pvp_farc_subset$kbalweights <- kbal(allx = pvp_farc_subset[,covariate_list],
                                    treatment = pvp_farc_subset$cultbinary,
                                    fullSVD = TRUE, meanfirst = F)$w

#create custom names for plotting covariates
varname_df <- data.frame(old = c("lit_ratio", "elev_stdev", "hwycover", "logpop", "electric_ratio", "ruralpct", "distCali", "distBogota", "distMedellin", "elev_stdev * ruralpct"), new = c("Literacy", "Rough Terrain", "Roads", "Population", "Rural", "Electricity",  "Cali Dist.", "Bogota Dist.", "Medellin Dist.", "*Rural x Terrain"))


#generate weights using entropy balancing
pvp_farc_subset$ebalweights <- weightit(formula = cultbinary ~ lit_ratio + electric_ratio + logpop + ruralpct + elev_stdev + hwycover + distCali + distBogota + distMedellin, data = pvp_farc_subset, method = "ebal", missing = "ind", estimand = "ATT")$weights 

#nearest-neighbor matching using mahalanobis distance
matching_df <- match.data(matchit(formula = cultbinary ~ lit_ratio + electric_ratio + logpop + ruralpct + elev_stdev + hwycover + distCali + distBogota + distMedellin, data = pvp_farc_subset, method = "nearest", distance = "mahalanobis", estimand = "ATT"))

#estimate models

#no weights
dim_95 <- lm_robust(dissident_presence ~ cultbinary, pvp_farc_subset, alpha = 0.05)
dim_90 <- lm_robust(dissident_presence ~ cultbinary, pvp_farc_subset, alpha = 0.1)

#kernel weights
kbal_95 <- lm_robust(dissident_presence ~ cultbinary, weights = kbalweights, pvp_farc_subset, alpha = 0.05)
kbal_90 <- lm_robust(dissident_presence ~ cultbinary, weights = kbalweights, pvp_farc_subset, alpha = 0.1)

#entropy weights
ebal_95 <- lm_robust(dissident_presence ~ cultbinary, weights = ebalweights, pvp_farc_subset, alpha = 0.05)
ebal_90 <- lm_robust(dissident_presence ~ cultbinary, weights = ebalweights, pvp_farc_subset, alpha = 0.1)

#matching
match_95 <- lm_robust(dissident_presence ~ cultbinary, matching_df, clusters = subclass, alpha = 0.05)
match_90 <- lm_robust(dissident_presence ~ cultbinary, matching_df, clusters = subclass, alpha = 0.1)

#combine results in a dataframe
balance_methods_results <- rbind(
  data.frame(Method = "Difference-in-Means", Conf_Level = "95%", Estimate = dim_95$coefficients['cultbinary'], Conf_Low = dim_95$conf.low['cultbinary'], Conf_High = dim_95$conf.high['cultbinary']), 
  data.frame(Method = "Difference-in-Means", Conf_Level = "90%", Estimate = dim_90$coefficients['cultbinary'], Conf_Low = dim_90$conf.low['cultbinary'], Conf_High = dim_90$conf.high['cultbinary']), 
  data.frame(Method = "Kernel Weights", Conf_Level = "95%", Estimate = kbal_95$coefficients['cultbinary'], Conf_Low = kbal_95$conf.low['cultbinary'], Conf_High = kbal_95$conf.high['cultbinary']), 
  data.frame(Method = "Kernel Weights", Conf_Level = "90%", Estimate = kbal_90$coefficients['cultbinary'], Conf_Low = kbal_90$conf.low['cultbinary'], Conf_High = kbal_90$conf.high['cultbinary']), 
  data.frame(Method = "Entropy Weights", Conf_Level = "95%", Estimate = ebal_95$coefficients['cultbinary'], Conf_Low = ebal_95$conf.low['cultbinary'], Conf_High = ebal_95$conf.high['cultbinary']), 
  data.frame(Method = "Entropy Weights", Conf_Level = "90%", Estimate = ebal_90$coefficients['cultbinary'], Conf_Low = ebal_90$conf.low['cultbinary'], Conf_High = ebal_90$conf.high['cultbinary']), 
  data.frame(Method = "Matching", Conf_Level = "95%", Estimate = match_95$coefficients['cultbinary'], Conf_Low = match_95$conf.low['cultbinary'], Conf_High = match_95$conf.high['cultbinary']), 
  data.frame(Method = "Matching", Conf_Level = "90%", Estimate = match_90$coefficients['cultbinary'], Conf_Low = match_90$conf.low['cultbinary'], Conf_High = match_90$conf.high['cultbinary'])
)

balance_methods_results$Method <- factor(balance_methods_results$Method, levels = c("Difference-in-Means", "Kernel Weights", "Entropy Weights", "Matching"))

#plot results
balance_methods_plot <- ggplot(balance_methods_results %>% 
                                 pivot_wider(names_from = "Conf_Level", values_from = c(Estimate, Conf_Low, Conf_High)),  
                               aes(y = reorder(Method, desc(Method)), x = `Estimate_95%`, color = Method, shape = Method)) +
  geom_vline(xintercept = 0, 
             colour = gray(1/2), lty = 2) +
  scale_color_manual(values = c("#0072B2", "#D55E00", "#009E73", "#CC79A7")) +
  geom_point(aes(y = reorder(Method, desc(Method)), 
                 x = `Estimate_95%`), position=position_dodge(.5), size = 3) + 
  geom_linerange(aes(y = reorder(Method, desc(Method)), 
                     xmin = `Conf_Low_90%`,
                     xmax = `Conf_High_90%`),
                 lwd = 1, position=position_dodge(.5)) + 
  geom_linerange(aes(y = reorder(Method, desc(Method)), 
                     xmin = `Conf_Low_95%`,
                     xmax = `Conf_High_95%`),
                 lwd = 1/2, position=position_dodge(.5)) +
  xlim(0, 0.75) +
  labs(x = "", y = "") +
  theme_minimal() +
  theme(legend.position="none")

#save plot
ggsave(plot = balance_methods_plot, filename="PvP_plots/balance_methods_plot.pdf", width = 6,
       height = 4, units = "in")

